{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tensorflow使用子类API实现WideDeep模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Wide&Deep是一个典型的多输入单输出的模型，综合利用浅层模型的记忆能力和深层模型的泛化能力：\n",
    "<img src=\"./images/wide_deep.jpg\" style=\"margin-left:0px; width:400px\"/>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "\n",
    "# 使用sklearn现成的波斯顿房价数据集\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "\n",
    "# 做训练集测试集验证集拆分\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# 对数值数据归一化\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. 下载sklearn的波士顿房价数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用这个函数下载波斯顿房价数据集\n",
    "housing = fetch_california_housing()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20640, 8)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data是房子的信息\n",
    "housing[\"data\"].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,\n",
       "         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,\n",
       "         3.78800000e+01, -1.22230000e+02],\n",
       "       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,\n",
       "         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,\n",
       "         3.78600000e+01, -1.22220000e+02],\n",
       "       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,\n",
       "         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,\n",
       "         3.78500000e+01, -1.22240000e+02]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing[\"data\"][:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20640,)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# target是房价\n",
    "housing[\"target\"].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.526, 3.585, 3.521])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing[\"target\"][:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 数据分割与归一化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 分割训练集、测试集、验证集\n",
    "X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target)\n",
    "X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用sklearn对数值数据做归一化\n",
    "scaler = StandardScaler()\n",
    "# 在训练集上做fit和transform\n",
    "X_train = scaler.fit_transform(X_train)\n",
    "\n",
    "# 在测试集和验证集上做transform\n",
    "X_valid = scaler.transform(X_valid)\n",
    "X_test = scaler.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. 把数据分割为Wide部分输入和Deep部分输入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A是0~4共5列，B是2~7共6列\n",
    "X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]\n",
    "\n",
    "# valid和test做同样的处理\n",
    "X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]\n",
    "X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11610, 5)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train_A.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11610, 6)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train_B.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 最终的预估数据：分别取test的前三行\n",
    "X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. 搭建WideDeep模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "class WideAndDeepModel(keras.models.Model):\n",
    "    \"\"\"实现一个自己的Model，通过继承实现\"\"\"\n",
    "    \n",
    "    def __init__(self, units=30, activation=\"relu\", **kwargs):\n",
    "        \"\"\"实现init初始化方法，初始化layer等变量\"\"\"\n",
    "        super().__init__(**kwargs)\n",
    "        self.hidden1 = keras.layers.Dense(units, activation=activation)\n",
    "        self.hidden2 = keras.layers.Dense(units, activation=activation)\n",
    "        self.hidden3 = keras.layers.Dense(units, activation=activation)\n",
    "        self.main_output = keras.layers.Dense(1)\n",
    "        \n",
    "    def call(self, inputs):\n",
    "        \"\"\"实现call方法，inputs是一个元组，可以传入多个输入\"\"\"\n",
    "        input_Wide, input_Deep = inputs\n",
    "        \n",
    "        hidden1 = self.hidden1(input_Deep)\n",
    "        hidden2 = self.hidden2(hidden1)\n",
    "        hidden3 = self.hidden3(hidden2)\n",
    "        \n",
    "        concat = keras.layers.concatenate([input_Wide, hidden3])\n",
    "        \n",
    "        return self.main_output(concat)\n",
    "\n",
    "# 创建Model对象\n",
    "model = WideAndDeepModel(30, activation=\"relu\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5. 模型编译、训练、测试、预估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 模型编译\n",
    "model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "363/363 [==============================] - 0s 831us/step - loss: 1.7322 - val_loss: 0.9141\n",
      "Epoch 2/10\n",
      "363/363 [==============================] - 0s 624us/step - loss: 0.7713 - val_loss: 0.7050\n",
      "Epoch 3/10\n",
      "363/363 [==============================] - 0s 614us/step - loss: 0.6504 - val_loss: 0.6331\n",
      "Epoch 4/10\n",
      "363/363 [==============================] - 0s 619us/step - loss: 0.5949 - val_loss: 0.5863\n",
      "Epoch 5/10\n",
      "363/363 [==============================] - 0s 615us/step - loss: 0.5565 - val_loss: 0.5534\n",
      "Epoch 6/10\n",
      "363/363 [==============================] - 0s 619us/step - loss: 0.5293 - val_loss: 0.5292\n",
      "Epoch 7/10\n",
      "363/363 [==============================] - 0s 612us/step - loss: 0.5062 - val_loss: 0.5101\n",
      "Epoch 8/10\n",
      "363/363 [==============================] - 0s 614us/step - loss: 0.4882 - val_loss: 0.4920\n",
      "Epoch 9/10\n",
      "363/363 [==============================] - 0s 610us/step - loss: 0.4716 - val_loss: 0.4796\n",
      "Epoch 10/10\n",
      "363/363 [==============================] - 0s 619us/step - loss: 0.4595 - val_loss: 0.4667\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7ff64ae5e250>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 模型训练\n",
    "model.fit((X_train_A, X_train_B), y_train, epochs=10,\n",
    "    validation_data=((X_valid_A, X_valid_B), y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "162/162 [==============================] - 0s 374us/step - loss: 0.4651\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.46509483456611633"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 模型测试\n",
    "model.evaluate((X_test_A, X_test_B), y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2.6609251],\n",
       "       [2.0604692],\n",
       "       [1.2639178]], dtype=float32)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 模型预估\n",
    "model.predict((X_new_A, X_new_B))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
